#!/usr/bin/env python
# coding=utf-8
# 实现百度搜索，保存成网页：https://www.baidu.com/s?wd=python3&pn=10

import os
from urllib.request import Request, urlopen
from urllib.parse import urlencode

default_headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
}
url = "https://www.baidu.com/s?"
file_name = "baidu_pages"
if not os.path.exists(file_name):
    os.makedirs(file_name)


def get_search(wd, pn):
    params = {"wd": wd, "pn": (pn - 1) * 10}
    resp = urlopen(Request(url + urlencode(params), headers=default_headers))
    assert resp.code == 200
    with open(file_name + "/%s-%s.html" % (wd, pn), "wb")as f:
        f.write(resp.read())
        print("%s-%s保存成功！" % (wd, pn))


if __name__ == '__main__':
    while True:
        wd = input("请输入要查询的内容：")
        if wd.strip():
            break
        else:
            print("不能为空！")
            continue
    while True:
        pn = input("请输入页数(默认5页):")
        pn = pn.strip()
        if pn:
            if pn.isdigit():
                pn = int(pn)
                break
            else:
                print("请输入正整数或直接回车采用默认！")
                continue
        else:
            pn = 5
            break

    for pn in range(1, pn + 1):
        get_search(wd, pn)
